clear
set more off
capture log close
*macro drop _all
cd "P:\2018\186"

/* SEQUENTIAL MODEL */
local model_descr "Sequential model: decomposition for parents only (pi from Yc, force pi1 same on eta_ep, eta_p)"


gl data_dir "P:\2018\186/use"
gl save_dir "P:\2018\186/Martin/Decompositions_SE_extLLSMM"

* CHOOSE subsample  

  *income years for children (not survey years)
 loc yrmin= ${yrmin} // earliest possible=1985	
 loc yrmax= ${yrmax} // latest possible=2016
  
	*STUBNAME for saving log file and output matrix/dataset 
	local modelnum "2b"
	global stubfilenm "SE_model_`modelnum'_seq_`yrmin'_`yrmax'"
  
  
	*LOG file 
	log using ${save_dir}/${stubfilenm}.log, replace
  
  *child ages to observe their adult incomes
  loc agemin=25 // youngest possible=25
  loc agemax=48 // oldest possible=48
  
  *child birth cohorts to use
  loc cohortmin=1952 // earliest possible=1952
  loc cohortmax=1994 // latest possible=1991



*USE the ranked main sample DATA (children matched to mother or father)
use *idnr *earn* *yob *obsy *max* *edu* *emp* year woman if year>=1985 using "use\swepanel_zeros", replace

/*
set seed 12354586
gen r=runiform()		// Draw sample for speed
keep if r<0.2
*/
* Rename variable names

rename pmearn pm_LAB
rename pfearn pf_LAB
rename pearn pLAB

rename memp m_emplavg
rename femp f_emplavg
rename emp employ

rename medu m_schmax
rename fedu f_schmax
rename edu schmax

ren idnr newid
ren midnr m_newid
ren fidnr f_newid

gen AGEC=year-yob		// Age variables and normalization
gen AGEC1=AGEC-40		
g f_LABAGE_1=fobsy-fyob
g m_LABAGE_1=mobsy-myob

forval i=2/4{
	g AGEC`i'=AGEC1^`i'
	g f_LABAGE_`i'=f_LABAGE_1^`i'
	g m_LABAGE_`i'=m_LABAGE_1^`i'
}


* SAMPLE selection

* Use mother's sample only
 keep if pm_LAB!=. & m_LABAGE_1!=.
 keep if m_emplavg!=.
 keep if m_schmax!=.
 
 
* RESDIUALIZE all measures 
 foreach var in pLAB employ schmax pm_LAB m_emplavg m_schmax  {
	qui regress `var' m_LABAGE_? AGEC? i.year
	predict `var'_r, resid
 }
 
 * Subsample (pool sons and daughters for now)
 keep if inrange(year,`yrmin',`yrmax') & inrange(AGEC,`agemin',`agemax')
 tab year, su(AGEC)
 
* ESTIMATE MODEL 

scalar drop _all
cap noi drop eta_* 
cap noi drop yhat* 
cap noi drop uhat*

*Sample sizes
qui reg pLAB_r pm_LAB_r, cluster(newid)
scalar N=e(N)
scalar Nkids=e(N_clust)
qui reg pLAB_r pm_LAB_r, cluster(m_newid)
scalar Nmothers=e(N_clust)

scalar list N Nkids Nmothers 


* "IGE"
 reg pLAB_r pm_LAB_r 
 scalar IGE=_b[pm_LAB_r]
	
* Parent employment: Ep = Hp + eta_ep 				
 reg m_emplavg_r m_schmax_r 	
	predict eta_ep, resid 	
	scalar phi=_b[m_schmax_r] 

* Parent income: Yp = O_0p*Ep + O_1p*Hp + eta_p
reg pm_LAB_r m_schmax_r m_emplavg_r  	


* Parent Income: Yp = (O_0p+O_1p)*Hp + O_0p*eta_ep + eta_p		
reg pm_LAB_r m_schmax_r eta_ep
	predict eta_p, resid 
		
	scalar O_p=_b[m_schmax_r] 
	scalar O_0p=_b[eta_ep] 	 
	scalar O_1p=O_p-(O_0p*phi)
	
	scalar list O_p O_0p O_1p phi
	
	
* Check predicted parent income rank.
cap noi drop *_pred
   gen pm_LAB_pred=O_p*m_schmax_r + O_0p*eta_ep + eta_p  		
   sum pm_LAB_r pm_LAB_pred  
   corr pm_LAB_r pm_LAB_pred 
   corr pm_LAB_r pm_LAB_pred, cov 
   reg pLAB_r pm_LAB_r
   reg pLAB_r pm_LAB_pred
               	 
			 	
* Child income to estimate pi
generate eta_epp=(O_0p*eta_ep)+eta_p 
	reg pLAB_r m_schmax_r eta_epp
	scalar pi1= _b[eta_epp] 
	scalar pi2= _b[m_schmax_r]-((_b[eta_epp])*O_p) 
		
scalar list pi1 pi2 

		
	
* DECOMPOSITION 
		

	*Get variances of parent income elements
	 su m_schmax_r
	 scalar vHp=r(Var)
	 su eta_ep 
	 scalar vEta_ep=r(Var)
	 su eta_p 
	 scalar vEta_p=r(Var)
	 scalar list vHp vEta_ep vEta_p 
	 	
	*Calc var(Y) using estimated components
	 scalar vYp=(O_p^2)*vHp + (O_0p^2)*vEta_ep +vEta_p
	 scalar list vYp	 	 
	 qui su pm_LAB_r // check against observed y var
	 di "pm_LAB_r var=" r(Var) 	
	 
	 
	
* IGE calculation
	scalar B= (pi1+(pi2/O_p))*(O_p^2)*(vHp/vYp) + (pi1*(O_0p^2)*(vEta_ep/vYp)) + pi1*(vEta_p/vYp)  



*SAVE OUTPUT

clear 
local IGAs		"IGE B"
local pis 		"pi1 pi2"
local varcovs 	"vYp vHp vEta_ep vEta_p"
local param		"O_0p O_1p O_p phi"  
local Ns		"N Nkids Nmothers"

local rownamelist ""
matrix $stubfilenm=(0)
foreach xx in `IGAs' `pis' `varcovs' `param' `Ns' {
	sca y=`xx'
	matrix $stubfilenm=$stubfilenm,y
	local varnamelist "`varnamelist' `xx'"
}
matrix ${stubfilenm}=${stubfilenm}[....,2....]
matrix colnames ${stubfilenm}=`varnamelist'

svmat2 ${stubfilenm}, names(col)
gen model_descr="`model_descr'"
gen modelnum="`modelnum'"
gen yearmin=`yrmin'
gen yearmax=`yrmax'

save ${save_dir}/${stubfilenm}.dta, replace



clear
log close 
